import calendar
import sys
import pandas as pd
import csv
import pickle
import numpy as np
import pandas
from collections import Counter 
from geopy import distance
import re


def adjust_elementary(cstr):
    if 'christian: ' not in cstr:
        return cstr
    cstr  = cstr.replace(' elementary ','')
    cstr  = cstr.replace(' elementar ','')
    return cstr

def adjust_cumberland(cstr):
    if 'cumberland:' not in cstr or cstr.replace(' ','')=='cumberland:' or \
            ' -' not in cstr:
        return cstr
    rest = cstr.split('cumberland:')[1]
    keep = rest[rest.index(' -' )+2:].lstrip(' ')
    return 'cumberland: '+keep





def get_distance(olat,olong,lat,long):   
    
    if not pd.isnull(olat) and not\
    pd.isnull(olong)\
    and not pd.isnull(lat) and not\
    pd.isnull(long):                              
        dist = distance.distance((olat,\
                                  olong),\
                                 (lat,\
                                  long))                            
        return dist.mi                                                          
    return np.nan 


def match_a_row(row,headers,state,year,admin_info):
    code_maps={
            'pa':map_to_state_code_pa
    }
    return code_maps[state](row,headers,year,admin_info)  

def match_a_state(state,year,admin_info):
    code_maps={
            'nc':map_state_nc,\
            'in':map_state_in,\
            'tx':map_state_tx,\
            'ut':map_state_ut,\
            'pa':map_state_pa,\
            'ri':map_state_ri,\
            'wi':map_state_wi,\
            'md':map_state_md,\
            'ia':map_state_ia}
    return code_maps[state](admin_info,year)  


def clean_county(cstring):
    return str(cstring).lower().replace(' county','')

def fix_num(a_num):
    if pd.isnull(a_num):
        return ''
    try:
        a_num = str(int(float(a_num)))
    
        if len(a_num)<2:
            return '0'+a_num
    except:
        a_num = str(a_num)
    return a_num

def map_to_state_code_pa(row,headers,year,match_info):
     pid = str(row[headers['pid']]).lower()
     county = str(row[headers['county']]).lower()
     ts_pid = county+": "+pid
     if ts_pid in match_info:
        return match_info[ts_pid]
     return np.nan


def map_code_ia(v,year):
    if year=='2012':
        if 'precinct_id' in v and not pd.isnull(v['precinct_id']) and \
            'county' in v and not pd.isnull(v['county']) and 'city' in v and \
            not pd.isnull(v['city']):
            return v['county'].lower() + ": "+str(v['city']).lower()+' '+v['precinct_id'].lower()
    elif year=='2016':
         if 'district_id' in v and not pd.isnull(v['district_id']) and \
            'county' in v and not pd.isnull(v['county']) and 'city' in v and \
            not pd.isnull(v['city']):
            return v['county'].lower() + ": "+str(v['city']).lower()+' '+v['district_id'].lower()
    
    return ''

def map_state_ia(admin_info,year):
    ld= {map_code_ia(v,year):v['UID'] for v in admin_info.values() if \
            v['year']==year}
    return {k:v for k,v in ld.items() if k!=''}


def fix_pa_fancy(county,pname,ts_county_pa):
    pname = check_date(pname,county)
    if county in ts_county_pa:
        digits = ts_county_pa[county]
        while(len(pname))<digits:
            pname = '0'+pname
    return county+': '+pname

def check_date(pname,county):
    lookup = {v.lower(): k for k,v in enumerate(calendar.month_abbr)}
    date_part = re.search('\d{1,2}\-(.*[a-z])',pname)
    if date_part is not None:
        d = pname[date_part.start():date_part.end()]
        num_one = d.split('-')[0]
        if county!='monroe':
            if int(num_one)<10:
                num_one = '0'+num_one
        num_two = d.split('-')[1]
        num_two = str(lookup[num_two])
        if int(num_two)<10:
            num_two = '0'+str(num_two)
        return num_two+'-'+num_one
    return pname

def map_code_ri(v):
    tkey=''
    if 'precinct_name' in v and not pd.isnull(v['precinct_name']):
        tkey = v['precinct_name']
    return tkey

def map_state_ri(admin_info,year):
    ld= {map_code_ri(v):v['UID'] for v in admin_info.values() if \
            v['year']==year}
    return {k:v for k,v in ld.items() if k!=''}

def map_code_pa(v,ts_counties):
    tkey=''
    if 'precinct_id' in v and not pd.isnull(v['precinct_id']) and \
            'county' in v and not pd.isnull(v['county']):
        if v['county']=='delaware':
             tkey = str(v['county']).lower()+": "+\
                         str(v['precinct_id']).lower()
        else:
             tkey = fix_pa_fancy(str(v['county']).lower(),\
                str(v['precinct_id']).lower(),ts_counties)
    return tkey

def get_pa_counties(year):
    
    vote_root = "" 
    fn = vote_root+'{}/{}.csv'.format('pa',year)
    df=pd.read_csv(fn,usecols=['pid','county'])
    ts_county_pa={str(c).lower():[] for c in df['county']}
    for i,c in zip(df['pid'],df['county']):
        if bool(re.match('^[0-9]+$', str(i))):
            ts_county_pa[c].append(len(str(i)))
    ts_county_pa={k:set(v).pop() for k,v in ts_county_pa.items() \
            if len(set(v))==1} 
    return ts_county_pa

def map_state_pa(admin_info,year):
    ts_counties = get_pa_counties(year)
    ld= {map_code_pa(v,ts_counties):v['UID'] for v in admin_info.values() if \
            v['year']==year}
    return {k:v for k,v in ld.items() if k!=''}

def fix_ind(cstr):
    t=' '.join(cstr.split())
    t=t.replace('st. joseph','st joseph')
    if 'vigo:' in t:
        digits = re.search(r'\d+', cstr)
        if not digits is None:
            first = int(cstr[digits.start():digits.end()])
            if first<10:
                first = '0'+str(first)
            first = str(first)
            
    return t

def map_code_in(v):

    if 'precinct_name' in v and not pd.isnull(v['precinct_name']):
            tkey = fix_ind(str(v['county']).lower()+": "+\
                    str(v['precinct_name']).lower())
            return tkey
    return ''

def map_state_in(admin_info,year):
    ld= {map_code_in(v):v['UID'] for v in admin_info.values() if \
            v['year']==year}
    return {k:v for k,v in ld.items() if k!=''}



def map_state_wi(admin_info,year):
    ld= {map_code_wi(v,year):v['UID'] for v in admin_info.values() if \
            v['year']==year and 'city' in v and 'precinct_name' in v}
    return {k:v for k,v in ld.items() if k!=''}

def pad_zeros_tx(pid,county):
    if county=='wood':
        return pid
    pid  = str(pid).replace('-','')
    
    m = re.findall(r"\d+", pid)
    
    if len(m)>0:
        d = int(m[0])
        zeros = ''
        c = d
        
        while c<1000:
            c=c*10
            zeros = zeros+'0'
        pid = zeros+str(int(m[0]))+pid[pid.index(str(d))+len(str(d)):]
        if pid[-2:]=='.0':
            pid = pid[:-2]

    return pid


def map_code_tx(v):
    tkey = ''
    if 'precinct_id' in v and not pd.isnull(v['precinct_id']):
            tkey = str(v['county']).lower().replace('_',' ')+": "+\
                    pad_zeros_tx(str(v['precinct_id']).lower(),\
                    str(v['county']).lower())
    return tkey


def map_state_tx(admin_info,year):
    ld= {map_code_tx(v):v['UID'] for v in admin_info.values() if \
            v['year']==year}
    return {k:v for k,v in ld.items() if k!=''}


def clean_utah(pname):
    for match in re.finditer(r"(?im)^\D*(\d+(?:[- ][a-z ]*[a-z])?)", pname):
        yournumber = match.group(1)
        return yournumber
    return ''

def map_code_ut(v):
    city =''
    tkey=''
    if 'city' in v:
        city = str(v['city'])
    if 'precinct_name' in v and not pd.isnull(v['precinct_name']):
        if 'slc' in str(v['precinct_name']).lower() and \
                    str(v['city']).lower()!='salt lake city':
            city = 'salt lake city'
        num = clean_utah(v['precinct_name'].strip(' ').lower().replace('-',''))
        tkeyone = city.lower()+" "+num
        try:
            tkeytwo = city.lower()+" "+str(int(num))
        except:
            tkeytwo=''
        tkeythree = city.lower()+" "+v['precinct_name'].strip(' ').lower().replace('-','')
        tkeyfour = tkeythree.replace(':00','')

    return [tkeyone,tkeytwo,tkeythree,tkeyfour] 

def map_state_ut(admin_info,year):
    potential_keys = [[map_code_ut(v),v['UID']] for v in admin_info.values() if \
            v['year']==year]
    ld = {}
    for k in potential_keys:
        for key in k[0]:
            if key!='':
                ld[key]=k[1]
    return ld


def fix_nc(pid):
    try:
        target = re.search(r"\d{1,5}\.\d{1,5}",pid)
        if target is not None:
            root = pid[:target.start()]
            num = float(pid[target.start():target.end()])
            if num<10:
                num = '0'+str(num)
            num = str(num)
            return root+num
    except:
        print(pid)
    return pid



def map_code_nc(v):
    if 'precinct_name' in v and not pd.isnull(v['precinct_name']) and \
            'county' in v and not pd.isnull(v['county']):
        tkey = str(v['county']).lower()+": "+\
                str(fix_nc(' '.join(v['precinct_name'].split()))).lower()
        return tkey 

    return ''

def map_state_nc(admin_info,year):
    ld= {map_code_nc(v):v['UID'] for v in admin_info.values() if \
            v['year']==year}
    return {k:v for k,v in ld.items() if k!=''}


def map_code_md(v):
    if 'district_id' in v and 'precinct_id' in v and 'county' in v and \
                not pd.isnull(v['district_id']) and not pd.isnull(v['county'])\
                and not pd.isnull(v['precinct_id']): 
            e = int(float(v['district_id']))
            p = int(float(v['precinct_id']))
            c = str(v['county']).lower().strip(' ').replace('\'','')
            if e<10 and e>=0:
                e = '0{}'.format(e)

            if p<10 and p>=0:
                p = '00{}'.format(p)
            elif p>=10:
                p = '0{}'.format(p)
            ts=c+": "+str(e)+'-'+str(p)
            return ts
    return ''

def map_state_md(admin_info,year):
    ld= {map_code_md(v):v['UID'] for v in admin_info.values() if \
            v['year']==year}
    return {k:v for k,v in ld.items() if k!=''}

def map_all(state,year,flag='',voter_file=False,server='local'):
    special_map = set(["hi","wi","ut","ri"])
    poll_root = ""  
    vote_root = "" 
    with open(poll_root+'administrative_information_{}{}.pkl'.format(state,flag),'rb') as f:    
        admin_info = pickle.load(f) 
    with open(poll_root+'physical_information_{}{}.pkl'.format(state,flag),'rb') as f:      
        phinfo = pickle.load(f)  
    uid_to_lat = {v['UID']:v['latitude'] for k,v in phinfo.items()\
                  if v['year']==year}
    uid_to_long = {v['UID']:v['longitude'] for k,v in phinfo.items() if\
                   v['year']==year}
    if state in ['hi']:
        match_info = admin_info
    else:
        match_info = match_a_state(state,year,admin_info)
    with open(vote_root+'{}/{}.csv'.format(state,year), newline='\n') as f:
        write_root = ""
        with open(write_root+'{}_voter_file_with_distances_{}.csv'.format(state,year),'w') as ff:
            writer = csv.writer(ff)
            reader= csv.reader(f)
            count = 0
            for row in reader:
                if count==0:
                    headers = {row[i]:i for i in range(len(row))}
                    cols = row+['assigned_UID_{}{}'.format(year,flag),\
                                'assigned_poll_{}{}_latitude'.format(year,flag),\
                               'assigned_poll_{}{}_longitude'.format(year,flag),\
                               'distance_to_assigned_{}{}'.format(year,flag)]
                    writer.writerow(cols)
                else:
                    uid = match_a_row(row,headers,state,year,match_info)
                    lat = np.nan
                    longi = np.nan
                    distance = np.nan
                    if   type(uid)==str and 'NA' not in uid and \
                            uid in uid_to_lat:
                        lat = uid_to_lat[uid]
                    if  type(uid)==str and 'NA' not in uid and \
                            uid in uid_to_long:
                        longi = uid_to_long[uid]
                    olong = row[headers['long_{}'.format('registration')]]
                    olat = row[headers['lat_{}'.format('registration')]]
                    distance = get_distance(olat,olong,lat,longi)

                    writer.writerow(row+[uid,lat,longi,distance])
                count = count+1   

if __name__ == '__main__': 
    state = sys.argv[1]
    flag = ''
    file_exists = False
    year = "2012"
    server = "local"
    if len(sys.argv)>2:
        flag = sys.argv[2]
        if flag!='':
            flag = '_'+flag   
    if len(sys.argv)>3:
        file_exists = sys.argv[3]
    if len(sys.argv)>4:
        year = sys.argv[4]
    if len(sys.argv)>5:
        server = sys.argv[5]

    map_all(state,year,flag=flag,voter_file=file_exists,server=server)
